Question 1

df <- read_csv("global_power_plant_database_v_1_3/global_power_plant_database.csv", show_col_types = FALSE)

# Pre-add continent for problem set
df$continent <- countrycode(sourcevar = df[, "country"] %>% pull(), 
                            # pull out the country column from orig data as a vector
                            origin = 'iso3c', # naming convention of the orig data
                            destination = "continent") # name of new var to create
#### Commenting out the code block dealing with geocoding 

### Getting the values for only United States 
#   df_US <- df %>% 
#            filter(country == "USA")
#   address <- df_US %>% 
#              reverse_geo(lat = df_US$latitude, long = df_US$longitude, method = 'osm')
## Converting to Dataframe and renaming the columns 
#  adr <- tibble(Lat = address[1], Long = address[2], Address = address[3])
#  
#  adr <- adr %>% 
#         rename(latitude = Lat, longitude = Long, Address = Address)
#  
#  ## Unlisting the elements 
#  adr$latitude <- unlist(adr$latitude)
#  adr$longitude <- unlist(adr$longitude)
#  adr$Address <- unlist(adr$Address)
#  
#  ## Joining the tables 
#  df_US <- df_US %>% 
#           select(name, capacity_mw, latitude, longitude) %>% 
#           left_join(adr, by = c('latitude' = 'latitude', 'longitude' = 'longitude'))
#  ### Filtering out only the required States 
#  states <- df_US$Address
#  
#  ### Getting the address in Virginia and West Virginia 
#  ind_vir <- str_which(states, "Virginia,")
#  
#  ### Getting the address in Maryland 
#  ind_mary <- str_which(states, "Maryland,") 
#  
#  ### Getting the address in DC
#  ind_DC <- str_which(states, "District of Columbia,")
#  
#  ### Filtering out the required rows 
#  states_ind <- sort(c(ind_vir, ind_mary, ind_DC))
#  df_US_dmv <- df_US[states_ind, ]
#  
#  ### Adding a State column 
#  for(row in 1:nrow(df_US_dmv)) {
#    if(length(str_subset(df_US_dmv$Address[row], "West Virginia,"))){
#        df_US_dmv$region[row] = "west virginia"
#        df_US_dmv$group[row] = 4
#    
#    }else if(length(str_subset(df_US_dmv$Address[row], ", Virginia,"))) {
#        df_US_dmv$region[row] = "virginia"
#        df_US_dmv$group[row] = 3
#        
#    }else if(length(str_subset(df_US_dmv$Address[row], "Maryland,"))) {
#        df_US_dmv$region[row] = "maryland"
#        df_US_dmv$group[row] = 2
#        
#    }else {
#      df_US_dmv$region[row] = "district of columbia"
#      df_US_dmv$group[row] = 1
#    }
#  }
#  
#  ### Renaming to lat and long, needed for mapping! Also filtering outliers
#  df_US_dmv <- df_US_dmv %>% 
#               rename(lat = latitude, long = longitude) %>% 
#               filter(lat < 42)
#  
#  ### Saving the dataframe for future use 
#  write.csv(df_US_dmv, "Pset2_Q1.csv", row.names = F)
### Getting data for US cities 
data("us.cities")
### Loading in the saved goecoded data 
df_US_dmv <- read.csv("Pset2_Q1.csv")

major_cities <- us.cities %>% 
                filter(name %in% c("Huntington WV", "Charleston WV", "Frederick MD", "Bel Air South MD", "Dale City VA", "Harrisonburg VA", "Lynchburg VA", "Roanoke VA", "Blacksburg VA", "Danville VA", "Suffolk VA", "Portsmouth VA", "Newport News VA", "Richmond VA", "Tuckahoe VA", "Hampton VA", "Norfolk VA", "Virginia Beach VA", "Chesapeake VA")) 
### Mapping 
regions <- c("virginia", "maryland", "west virginia", "district of columbia")
state_projection <- map_data('state', region = regions)
county_projection <- map_data('county', region = regions)

base_map <- ggplot(county_projection, aes(x = long, y = lat)) + ### Get the county information in 
            geom_polygon(aes(group = group), fill = "white", color = "lightgrey") +  ### Map the counties 
            geom_path(data = state_projection, aes(x = long, y = lat, group = group), color = "black") + ### Get the state information and map it 
            coord_map("polyconic")  ### Type of Projection 
             
  
  
map_1 <-   base_map +
           geom_point(data = df_US_dmv, aes(color = region, size = capacity_mw), alpha = 0.4)  +  ### Powerplant locations 
           geom_text_repel(data = major_cities, aes(label = name), size = 2.3) +  ## Cities names
           labs(title = "Maryland has Largest Number of Powerplants ", subtitle = "Locations of powerplants in DC, Maryland, Virginia and West Virginia", color = 'State', size = 'Capacity (MW)') +
           scale_color_discrete(labels = c("District of Columbia", "Maryland", "Virginia", "West Virginia"))  +
           theme_void()  ### Removing all unneeded ink 


#ggsave("Map_1.png", map_1, dpi = 200)  ## Saving the map 

map_1

Question 2

### Pulling in the data from Census API

#census_api_key("")

options(tigris_use_cache = TRUE)

virginia_county <- get_acs(
                            state = "VA",
                            geography = "county",
                            variables = "B19013_001",
                            geometry = TRUE,
                            year = 2020
)
## Getting data from the 2016-2020 5-year ACS
northern_virginia_counties <- c("Alexandria city", "Arlington County", "Clarke County", 
                                "Culpeper County", "Fairfax city", "Fairfax County", "Falls Church city", "Fauquier County",
                                "Frederick County", "Fredericksburg city", "Loudoun County", 
                                "Madison County", "Manassas city", "Manassas Park city",
                                "Prince William County", "Rappahannock County", "Spotsylvania County", "Stafford County",
                                "Warren County", "Winchester city")

### Code adapted from https://stackoverflow.com/questions/44981338/r-split-string-by-delimiter-in-a-column for splitting a column 
NV_data <- virginia_county %>% 
           mutate(County = strsplit(NAME, ",") %>%  ### For extracting only county name from the column 
                    as.data.frame() %>% 
                    t %>% 
                    data.frame(stringsAsFactors = F) %>% 
                    pull(1)) %>% 
           filter(County %in% northern_virginia_counties)

### Getting the Median of the Median Household Estimates of Northern Virginia 
nova_est <- median(NV_data$estimate)

### Getting the geometry for Northern Virginia Region 
nv_union <- NV_data %>% 
            summarize()

nv_union <- st_cast(nv_union, "MULTIPOLYGON") ### Casting to MULTIPOLYGON as summarize converts to POLYGON 

### Creating dataframe with only Northern Virginia row
nv_final <- data.frame(NAME = "Northern Virginia", estimate = nova_est, geometry = nv_union$geometry)

### Creating final dataframe with rows excluding Northern Virginia counties
map_df <- virginia_county %>% 
          mutate(County = strsplit(NAME, ",") %>%  ### For extracting only county name from the column 
                    as.data.frame() %>% 
                    t %>% 
                    data.frame(stringsAsFactors = F) %>% 
                    pull(1)) %>% 
           filter(!County %in% northern_virginia_counties) %>% 
           select(NAME, estimate, geometry) 

### Process to add Northern Virginia Record to map_df  
###Source : https://github.com/r-spatial/sf/issues/588
### Step 1 : Convert both data frames to data.table
### Step 2 : Using data.table::rbindlist() to combine them 
### Step 3 : Convert back to sf using st_sf() 

map_df <- setDT(map_df)
nv_final <- setDT(nv_final)
dt_list = list(map_df, nv_final) ### rbindlist takes in a list object 
map_df <- data.table::rbindlist(dt_list, fill = TRUE)
map_df <- st_sf(map_df)
### Mapping 

nv_final <- st_sf(nv_final)  ### Converting back to sf object 
min_income <- min(map_df$estimate)
max_income <- max(map_df$estimate)
divide_by <- 1000 ### For legend formatting

map_2 <- map_df %>% 
         ggplot(aes(fill = estimate/divide_by)) +
         geom_sf(color = NA) +
         scale_fill_viridis_c(option = "magma", limits = c(min_income, max_income)/divide_by, labels = label_dollar(suffix = 'K')) + 
         geom_sf_text(data = nv_final, label = "NoVA") +
         labs(title = "Northern Virginia has some of the Highest Median Incomes in the State", subtitle = "Median household incomes across counties in Virginia", fill = "Median Income") +
         theme_void()   ### Removing all unneeded ink

map_2

#ggsave("Map_2.png", map_2, dpi = 200)          

Question 3

### Getting the Election Data and Trump Margin Percentage
election_data <- election  
election_data$Trump_Pct_Margin <- ifelse(election_data$winner == "Trump", election_data$pct_margin, 0-election_data$pct_margin)                  
### Getting the geometry from tilegramR package  
state_shape <- sf_NPR1to1

### Joining the tables 
states_election <- left_join(state_shape, election_data, by = c("state" = "st"))
## old-style crs object detected; please recreate object with a recent sf::st_crs()
### Mapping 
map_3 <- states_election %>% 
         ggplot() +
         geom_sf(aes(fill = Trump_Pct_Margin, color = winner)) +
         geom_sf_text(aes(label = state), color = "white") +
         scale_color_manual(values = c("blue", "red")) +
         scale_fill_gradient2(
         low = "darkblue", 
         mid = "grey", 
         high = "darkred", 
         midpoint = 0.0,
         labels = label_number(accuracy = 0.1, suffix = '%')
         ) +
         labs(title = "Trump Won in 10 States With Less Than 0.1% Margin", subtitle = "2016 Presidential election results and vote margins", fill = "Trump Margin", color = "Winner") +
         theme_void()

#ggsave("Map_3.png", map_3, dpi = 200)
map_3

Question 4

##### NOTE #####
#### To download tidyquant, forecast dependency is required for whihc the FLIBS variable was set to /opt/homebrew/bin/gcc/12.2.0 to point to gfortran 


### Get the data 
df_faang<- tq_get(c("META", "AAPL", "AMZN", "NFLX", "GOOG"),  ### Ticker symbols for FAANG Companies 
           get = "stock.prices",
           from = "2012-01-01",
           to = "2022-11-10")

max_trading_volume <-max(df_faang$volume)
divide_by <- 1000000
plot_1 <- ggplot(df_faang, aes(x = volume/divide_by, y = adjusted, color = symbol)) +
          scale_x_continuous(trans = "log10", limits = c(1e+06, max_trading_volume)/divide_by, labels = label_number(suffix = "MM", accuracy = 1)) +
          scale_y_continuous(labels = label_dollar()) +
          scale_color_discrete(labels = c("Amazon", "Apple", "Google", "Meta", "Netflix")) + 
          geom_point(alpha = 0.8, size = 7)

plot_2 <- plot_1 +
          transition_time(date) +
          ease_aes("linear") +
          labs(title = "Adjusted Price and Volume for FAANG Stocks: {frame_time}", x = "Trading Volume", y = "Adjusted Price", color = "") +
          shadow_wake(wake_length = 0.5)

animate(plot_2, renderer = magick_renderer(), nframes = 300)

Question 5

### Getting the data 
gun_checks_df <- read.csv("nics-firearm-background-checks.csv")

### Getting the difference in handgun checks from Jan 2000 to Jan 2022
handguns_df <- gun_checks_df %>% 
               filter(month %in% c("2022-01", "2000-01")) %>% 
               select(state, month, handgun) %>% 
               mutate(region = str_to_lower(state)) %>% 
               pivot_wider(id_cols = c(state, region), names_from = month, values_from = handgun)  %>% 
               mutate(Change_in_Checks = (`2022-01` - `2000-01`))  ### Getting the change in handgun checks over the past 22 years 

### Joining with state_projection
state_projection <- map_data("state")
handgun_change_states <- left_join(state_projection, handguns_df, by = "region")

min_change <- min(handgun_change_states$Change_in_Checks)
max_change <- max(handgun_change_states$Change_in_Checks) 
divide_by <- 1000

### Mapping 
map_5 <- ggplot(handgun_change_states, aes(x = long, y = lat, group = group)) +
         geom_polygon(aes(fill = Change_in_Checks/divide_by)) +
         geom_path(color = "white") +
         scale_fill_distiller(palette = 'Reds', direction = 1, limits = c(min_change, max_change)/divide_by, labels = label_number(accuracy = 1, suffix = "K")) +
         labs(title = "Florida Has the Highest Increase In Handgun Checks", subtitle = "Choropleth Map displaying the change in number of handgun checks from 2000 to 2022 across states", fill = "Increase in Handgun Checks")  +
         coord_map("polyconic")  +
         theme_void()
              

map_5

#ggsave("Map_5.png", map_5, dpi = 200)

Bonus Leaflet

states_data <- us_states()  ## From USAboundaries package 
handgun_change_states <- left_join(states_data, handguns_df, by = c("state_name" = "state"))


bins <- c(0, 10000, 20000, 30000, 40000, 50000)  ## legend scale 
pal <- colorBin("Reds", domain = handgun_change_states$Change_in_Checks, bins = bins)

leaflet(handgun_change_states) %>%
  addTiles() %>% 
  setView(-96, 37.8, 4)  %>% 
  addPolygons(
    weight = 1,
    fillColor = ~pal(Change_in_Checks),
    fillOpacity = 0.8, 
    color = "white",
    popup = ~str_c(state_name, "</br>", "Increase in Handguns: ", Change_in_Checks),
    highlightOptions = highlightOptions(
    weight = 5,
    color = "red"
  )) %>% 
  addLegend(pal = pal, values = ~Change_in_Checks, opacity = 0.7, title = "Increase in Handgun Checks",
  position = "bottomright")

Question 6

plot_1 <- ggplot(df_faang, aes(x = date, y = adjusted, color = symbol))  +
          geom_line() +
          scale_y_continuous(labels = label_dollar()) +
          scale_color_discrete(labels = c("Amazon", "Apple", "Google", "Meta", "Netflix")) +
          labs(x = "Year", y = "Adjusted Prices", title = "Netflix Stock Prices Hightest Among FAANG after 2017", subtitle = "Adjusted Stock Price Trends for FAANG Companies", color = "")

plot_2 <- plot_1 +
          geom_point(show.legend = FALSE)  +
          transition_reveal(date)

animate(plot_2, fps = 8, end_pause = 40)  ### Default fps = 10, end_pause to repeat the last frame 40 times (5 sec)  
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?